cfs <- read_rds("C:/Users/neham/Desktop/DS401/DS 401 Spring 2024/Source Data/CGC_CFS_2018-2023.rds")
Grouped by jurisdiction and cfs_type, count the occurrences, for exploration.
cfs_type_per_jurisdiction <- cfs %>%
group_by(jurisdiction, cfs_type) %>%
summarise(count = n())
## `summarise()` has grouped output by 'jurisdiction'. You can override using the
## `.groups` argument.
#View(cfs_type_per_jurisdiction)
Create a new column that categorizes ‘Weekday’ or ‘Weekend’
cfs <- cfs %>%
mutate(weekdays = ifelse(cfs_weekday %in% 2:6, "Weekday", "Weekend"))
Excluding Other category, since a lot of the data is classified as “Other”. Can look more closely at substance related categories
## `summarise()` has grouped output by 'jurisdiction'. You can override using the
## `.groups` argument.
# Count occurrences of each call source
call_source_counts <- cfs %>%
group_by(jurisdiction) %>%
summarise(count = n()) %>%
arrange(desc(count))
ggplot(call_source_category_counts, aes(x = reorder(jurisdiction, -count), y = count / sum(count), fill = category)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Distribution of Calls from Different Sources by Category (Excluding 'Other')",
x = "Call Source",
y = "Percentage",
fill = "Call Category") +
scale_y_continuous(labels = scales::percent_format(scale = 1)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_viridis_d() # Adjust the color scale as needed
This plot looks at the monthly distribution of calls by weekday
ggplot(cfs, aes(x = cfs_month, fill = factor(cfs_weekday))) +
geom_bar(position = "stack") +
labs(title = "Monthly Distribution of Calls by Weekday",
x = "Month",
y = "Count",
fill = "Weekday") +
theme_minimal()
Similar plot looking at the distribution of calls by category and weekday
ggplot(cfs, aes(x = weekdays, fill = category)) +
geom_bar(position = "stack") +
labs(title = "Distribution of Calls by Category and Weekday",
x = "Weekday",
y = "Count",
fill = "Category") +
theme_minimal()
Filter the categories column for substance-related categories. Plot that shows the distribution of substance-related calls by weekday.
substance_categories <- c("Substance-On Premise", "Substance-Off Premise", "Substance-Driving")
substance_cfs <- cfs %>%
filter(category %in% substance_categories)
ggplot(substance_cfs, aes(x = factor(cfs_weekday), fill = category)) +
geom_bar(position = "stack") +
labs(title = "Distribution of Substance-Related Calls by Weekday",
x = "Weekday",
y = "Count",
fill = "Category") +
scale_x_discrete(labels = c("Sun", "Mon", "Tues", "Wed", "Thurs", "Fri", "Sat")) +
theme_minimal()
substance_categories <- c("Substance-On Premise", "Substance-Off Premise", "Substance-Driving")
substance_cfs <- cfs %>%
filter(category %in% substance_categories)
substance_cfs_counts <- substance_cfs %>%
group_by(cfs_weekday, category) %>%
summarise(count = n()) %>%
arrange(cfs_weekday, desc(count))
## `summarise()` has grouped output by 'cfs_weekday'. You can override using the
## `.groups` argument.
substance_cfs_percentages <- substance_cfs_counts %>%
group_by(cfs_weekday) %>%
mutate(percentage = count / sum(count) * 100)
ggplot(substance_cfs_percentages, aes(x = factor(cfs_weekday), y = percentage, fill = category)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Distribution of Substance-Related Calls by Weekday",
x = "Weekday",
y = "Percentage",
fill = "Category") +
scale_x_discrete(labels = c("Sun", "Mon", "Tues", "Wed", "Thurs", "Fri", "Sat")) +
scale_y_continuous(labels = scales::percent_format(scale = 1)) +
theme_minimal()
Distribute across the 3 PD’s
substance_categories <- c("Substance-On Premise", "Substance-Off Premise", "Substance-Driving")
substance_cfs <- cfs %>%
filter(category %in% substance_categories)
# Create a plot with facets for each combination of weekday and call source
ggplot(substance_cfs, aes(x = factor(cfs_weekday), fill = category)) +
geom_bar(position = "stack") +
facet_grid(. ~ jurisdiction) +
labs(title = "Distribution of Substance-Related Calls by Weekday and Call Source",
x = "Weekday",
y = "Count",
fill = "Category") +
scale_x_discrete(labels = c("Sun", "Mon", "Tues", "Wed", "Thurs", "Fri", "Sat")) +
theme_minimal()
substance_cfs_counts <- substance_cfs %>%
group_by(jurisdiction, cfs_weekday, category) %>%
summarise(count = n()) %>%
arrange(cfs_weekday, jurisdiction, desc(count))
## `summarise()` has grouped output by 'jurisdiction', 'cfs_weekday'. You can
## override using the `.groups` argument.
substance_cfs_proportions <- substance_cfs_counts %>%
group_by(jurisdiction, cfs_weekday) %>%
mutate(proportion = count / sum(count))
ggplot(substance_cfs_proportions, aes(x = factor(cfs_weekday), y = proportion, fill = category)) +
geom_bar(stat = "identity", position = "stack") +
facet_grid(. ~ jurisdiction) +
labs(title = "Distribution of Substance-Related Calls by Weekday and Call Source",
x = "Weekday",
y = "Proportion",
fill = "Category") +
scale_x_discrete(labels = c("Sun", "Mon", "Tues", "Wed", "Thurs", "Fri", "Sat")) +
scale_y_continuous(labels = scales::percent_format(scale = 1)) +
theme_minimal()
cfs$cfs_date <- as.Date(cfs$cfs_date)
cfs$year <- lubridate::year(cfs$cfs_date)
cfs$month <- lubridate::month(cfs$cfs_date)
# Create a time series plot for calls over months with lines for each year
ggplot(cfs, aes(x = month, y = ..count.., group = year, color = as.factor(year))) +
geom_line(stat = "count", binwidth = 30) + # Binwidth set to approximately one month
labs(title = "Calls Over Months by Year",
x = "Month",
y = "Number of Calls",
color = "Year") +
scale_x_continuous(breaks = 1:12, labels = month.abb) + # Show month abbreviations on x-axis
theme_minimal()
## Warning in geom_line(stat = "count", binwidth = 30): Ignoring unknown
## parameters: `binwidth`
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Create a time series plot for calls over hours of the day with lines for each year
ggplot(cfs, aes(x = cfs_hour, group = year, color = as.factor(year))) +
geom_freqpoly(binwidth = 1, size = 1) + # Binwidth set to 1 hour
labs(title = "Calls Over Hours of the Day by Year",
x = "Hour of the Day",
y = "Number of Calls",
color = "Year") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Convert cfs_date to a Date object if it's not already
cfs$cfs_date <- as.Date(cfs$cfs_date)
# Extract year and month from cfs_date
cfs$year <- lubridate::year(cfs$cfs_date)
cfs$month <- lubridate::month(cfs$cfs_date)
# Filter out "Other" category
filtered_cfs <- cfs %>%
filter(category != "Other")
# Create an interactive time series plot with animation and facets
p <- ggplot(filtered_cfs, aes(x = cfs_hour, color = category, group = interaction(category, year))) +
geom_freqpoly(binwidth = 1, size = 1) +
labs(title = "Temporal Trends in Calls Over Hours of the Day by Category",
x = "Hour of the Day",
y = "Number of Calls",
color = "Category") +
theme_minimal() +
theme(legend.position = "top") +
facet_wrap(~year, scales = 'free_y', ncol = 2)
# Convert ggplot to plotly with animation
p <- ggplotly(p, animation_frame = ~year)
p
Across all years, substance related categories
gb <- readRDS("C:/Users/neham/Desktop/DS401/DS 401 Spring 2024/Source Data/CGC_block_groups.rds")
cfspre <- readRDS("C:/Users/neham/Desktop/DS401/DS 401 Spring 2024/Source Data/CGC_CFS_2018-2023.rds")
grid <-readRDS("C:/Users/neham/Desktop/DS401/DS 401 Spring 2024/Source Data/CGC_grid.rds")
cfs <- st_drop_geometry(cfspre)
coordinates <- st_as_sf(cfs, coords = c("longitude", "latitude"), crs = 4326)
# Filter substance-related calls
substance_categories <- c("Substance-On Premise", "Substance-Off Premise", "Substance-Driving")
substance_cfs <- cfs %>%
filter(category %in% substance_categories)
# Create a spatial object for substance-related calls
substance_coordinates <- st_as_sf(substance_cfs, coords = c("longitude", "latitude"), crs = 4326)
# Count occurrences in grid squares
substance_counts <- substance_coordinates %>%
st_drop_geometry() %>%
group_by(grid_id) %>%
summarise(count = n())
# Merge with grid data
grid_substance <- left_join(grid, substance_counts, by = "grid_id")
# Create map
mapview(grid_substance, zcol = "count", legend = TRUE)
For year 2018
mapview(grid_substance, zcol = "count", legend = TRUE)
For year 2019
mapview(grid_substance, zcol = "count", legend = TRUE)
For year 2020
mapview(grid_substance, zcol = "count", legend = TRUE)
For year 2021
mapview(grid_substance, zcol = "count", legend = TRUE)
For year 2022
mapview(grid_substance, zcol = "count", legend = TRUE)
For year 2023
mapview(grid_substance, zcol = "count", legend = TRUE)
cfs <- readRDS("C:/Users/neham/Desktop/DS401/DS 401 Spring 2024/Source Data/CGC_CFS_2018-2023.rds")
substance_categories <- c("Substance-On Premise", "Substance-Off Premise", "Substance-Driving")
substance_cfs <- cfs %>% filter(category %in% substance_categories)
ui <- fluidPage(
titlePanel("Temporal and Spatial Interaction for Substance-Related Calls"),
sliderInput("hourSlider", "Select Hour:", min = 0, max = 23, value = 0, step = 1),
leafletOutput("map")
)
server <- function(input, output) {
# Filter data based on selected hour
filtered_data <- reactive({
substance_cfs %>%
filter(cfs_hour == input$hourSlider)
})
category_colors <- c("#66c2a5", "#fc8d62", "#8da0cb")
# Create map
output$map <- renderLeaflet({
leaflet() %>%
addTiles() %>%
addCircleMarkers(data = filtered_data(),
~longitude, ~latitude,
popup = paste("Hour: ", input$hourSlider),
label = ~category,
color = ~factor(category, levels = substance_categories, labels = category_colors),
fillOpacity = 0.8)
})
}
shinyApp(ui, server)
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
# Load your data
cfs <- readRDS("C:/Users/neham/Desktop/DS401/DS 401 Spring 2024/Source Data/CGC_CFS_2018-2023.rds")
substance_categories <- c("Substance-On Premise", "Substance-Off Premise", "Substance-Driving")
substance_cfs <- cfs %>% filter(category %in% substance_categories)
# Define UI
ui <- fluidPage(
titlePanel("Temporal and Spatial Interaction for Substance-Related Calls"),
sliderInput("yearSlider", "Select Year:", min = 2018, max = 2023, value = 2018, step = 1),
leafletOutput("map")
)
# Define server
server <- function(input, output) {
# Filter data based on selected year
filtered_data <- reactive({
substance_cfs %>%
filter(cfs_year == input$yearSlider)
})
category_colors <- c("#66c2a5", "#fc8d62", "#8da0cb")
# Create map
output$map <- renderLeaflet({
leaflet() %>%
addTiles() %>%
addCircleMarkers(data = filtered_data(),
~longitude, ~latitude,
popup = paste("Year: ", input$yearSlider),
label = ~category,
color = ~factor(category, levels = substance_categories, labels = category_colors),
fillOpacity = 0.8)
})
}
# Run the application
shinyApp(ui, server)
cfs$substance_related <- ifelse(cfs$category %in% substance_categories, 1, 0)
logistic_model <- glm(substance_related ~ as.factor(cfs_weekday), data = cfs, family = "binomial")
summary(logistic_model)
##
## Call:
## glm(formula = substance_related ~ as.factor(cfs_weekday), family = "binomial",
## data = cfs)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.65133 0.06983 -66.605 < 2e-16 ***
## as.factor(cfs_weekday)2 -0.30731 0.10232 -3.003 0.002671 **
## as.factor(cfs_weekday)3 -0.37219 0.10424 -3.570 0.000356 ***
## as.factor(cfs_weekday)4 -0.29300 0.10233 -2.863 0.004191 **
## as.factor(cfs_weekday)5 -0.11029 0.09770 -1.129 0.258989
## as.factor(cfs_weekday)6 -0.12131 0.09674 -1.254 0.209862
## as.factor(cfs_weekday)7 0.15524 0.09285 1.672 0.094560 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 16760 on 175620 degrees of freedom
## Residual deviance: 16716 on 175614 degrees of freedom
## AIC: 16730
##
## Number of Fisher Scoring iterations: 7
odds_ratios <- exp(coef(logistic_model))
odds_ratios
## (Intercept) as.factor(cfs_weekday)2 as.factor(cfs_weekday)3
## 0.009548851 0.735425846 0.689220832
## as.factor(cfs_weekday)4 as.factor(cfs_weekday)5 as.factor(cfs_weekday)6
## 0.746020073 0.895578501 0.885762105
## as.factor(cfs_weekday)7
## 1.167932762